Three Sarcoidosis Bronchoalveolar lavage (BAL) datasets were sequenced by Illumina NovaSeq 6000 and processed by Pipeline-Version: cellranger-7.1.0
Ten Healthy Samples Bronchoalveolar lavage (BAL) datasets were sequenced by Illumina NovaSeq 6000 and and processed by Pipeline-Version: oldercellranger. All Healthy Samples aged 18–50 years https://cells.ucsc.edu/?ds=healthy-bal
# Import necessary libraries
import scanpy as sc #software suite of tools for single-cell analysis in python
import besca as bc #internal BEDA package for single cell analysis
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy.external as sce
from harmony import harmonize
import umap.umap_ as umap
import os
from scipy import io
print(ad.__version__)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpgeu6z05w INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpgeu6z05w/_remote_module_non_scriptable.py INFO:lightning_fabric.utilities.seed:Global seed set to 0
0.9.1
Loading all BAL datasets into the workspace
balsarc1: SAM24412249-Sarcoidosis_Donor1_BAL-male-57yrs-white from Genentech (10x Genomics Chromium v3.1 3’ NovaSeq 6000)
balsarc2: SAM24412250 Sarcoidosis_Donor2_PBMC: male-35yrs-southasisan sequenced by Genentech (10x Genomics Chromium v3.1 3’ NovaSeq 6000)
balsarc3: SAM24412251 Sarcoidosis_Donor3_PBMC: female-60yrs-white sequenced by Genentech (10x Genomics Chromium v3.1 3’ NovaSeq 6000)
10 Healthy BAL samples collected from NCBI GEO link
Cite: Mould KJ, Moore CM, McManus SA, McCubbrey AL, McClendon JD, Griesmer CL, Henson PM, Janssen WJ. Airspace Macrophages and Monocytes Exist in Transcriptionally Distinct Subsets in Healthy Adults. Am J Respir Crit Care Med. 2021 Apr 15;203(8):946-956. doi: 10.1164/rccm.202005-1989OC. PMID: 33079572; PMCID: PMC8048748. Format:
balhealthy1: GSM4593888. For more info Link
balhealthy2: GSM4593889 For more info Link
balhealthy3: GSM4593890. For more info Link
balhealthy4: GSM4593891. For more info Link
balhealthy5: GSM4593892. For more info Link
balhealthy6: GSM4593893. For more info Link
balhealthy7: GSM4593894. For more info Link
balhealthy8: GSM4593895. For more info Link
balhealthy9: GSM4593896. For more info Link
balhealthy10: GSM4593897. For more info Link
#loading all BAL datasets into the workspace
#Load Disease BAL dataset1 for sarcoidosis
balsarc1=sc.read_10x_mtx('/raid02/Data-live/tjana/LIB5455298_SAM24412249/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols',cache=True)
#Load Disease BAL dataset2 for sarcoidosis
balsarc2=sc.read_10x_mtx('/raid02/Data-live/tjana/LIB5455300_SAM24412251/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols',cache=True)
#Load Disease PBMC dataset1 for sarcoidosis
balsarc3=sc.read_10x_mtx('/raid02/Data-live/tjana/LIB5455302_SAM24412253/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control1
balhealthy1=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593888/',
var_names='gene_symbols', cache=True)
#load Healthy BAL Control2
balhealthy2=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593889/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control3
balhealthy3=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593890/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control4
balhealthy4=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593891/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control5
balhealthy5=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593892/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control6
balhealthy6=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593893/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control7
balhealthy7=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593894/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control8
balhealthy8=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593895/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control9
balhealthy9=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593896/',
var_names='gene_symbols',cache=True)
#load Healthy BAL Control10
balhealthy10=sc.read_10x_mtx('/raid02/Data-live/tjana/healthy_BAL/GSM4593897/',
var_names='gene_symbols',cache=True)
... reading from cache file cache/raid02-Data-live-tjana-LIB5455298_SAM24412249-outs-filtered_feature_bc_matrix-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-LIB5455300_SAM24412251-outs-filtered_feature_bc_matrix-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-LIB5455302_SAM24412253-outs-filtered_feature_bc_matrix-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593888-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593889-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593890-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593891-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593892-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593893-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593894-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593895-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593896-matrix.h5ad ... reading from cache file cache/raid02-Data-live-tjana-healthy_BAL-GSM4593897-matrix.h5ad
# Make variable names unique for each dataset
#Making all indexes into unique of all samples (Disease SARCOIDOSIS and Healthy)
#Sarcoidosis disease
balsarc1.var_names_make_unique()
balsarc2.var_names_make_unique()
balsarc3.var_names_make_unique()
#Healthy/control
balhealthy1.var_names_make_unique()
balhealthy2.var_names_make_unique()
balhealthy3.var_names_make_unique()
balhealthy4.var_names_make_unique()
balhealthy5.var_names_make_unique()
balhealthy6.var_names_make_unique()
balhealthy7.var_names_make_unique()
balhealthy8.var_names_make_unique()
balhealthy9.var_names_make_unique()
balhealthy10.var_names_make_unique()
Adding some metadata for all PBMC samples
# Adding some metadata for all BAL samples
balsarc1.obs['type']="Sarcoidosis"
balsarc1.obs['sample']="BAL-Sarc-1"
balsarc2.obs['type']="Sarcoidosis"
balsarc2.obs['sample']="BAL-Sarc-2"
balsarc3.obs['type']="Sarcoidosis"
balsarc3.obs['sample']="BAL-Sarc-3"
balhealthy1.obs['type']="Healthy"
balhealthy1.obs['sample']="BAL-healthy-1"
balhealthy2.obs['type']="Healthy"
balhealthy2.obs['sample']="BAL-healthy-2"
balhealthy3.obs['type']="Healthy"
balhealthy3.obs['sample']="BAL-healthy-3"
balhealthy4.obs['type']="Healthy"
balhealthy4.obs['sample']="BAL-healthy-4"
balhealthy5.obs['type']="Healthy"
balhealthy5.obs['sample']="BAL-healthy-5"
balhealthy6.obs['type']="Healthy"
balhealthy6.obs['sample']="BAL-healthy-6"
balhealthy7.obs['type']="Healthy"
balhealthy7.obs['sample']="BAL-healthy-7"
balhealthy8.obs['type']="Healthy"
balhealthy8.obs['sample']="BAL-healthy-8"
balhealthy9.obs['type']="Healthy"
balhealthy9.obs['sample']="BAL-healthy-9"
balhealthy10.obs['type']="Healthy"
balhealthy10.obs['sample']="BAL-healthy-10"
Explore the loaded data before preprocessing for each dataset using a for loop
# Explore the loaded data for each dataset using a for loop
for adata_list in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
display(adata_list)
AnnData object with n_obs × n_vars = 11727 × 36601
obs: 'type', 'sample'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 13340 × 36601
obs: 'type', 'sample'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 5883 × 36601
obs: 'type', 'sample'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 6816 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 6719 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5007 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5265 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5826 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5423 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5773 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 6690 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 2971 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 4356 × 33538
obs: 'type', 'sample'
var: 'gene_ids'
chromosomeY: male and XIST expression: female computing
# chromosomeY: males and XIST (X-inactive specific transcript):female
def get_biomart_annotations(species, gene_info):
return sc.queries.biomart_annotations(species, gene_info).set_index("external_gene_name")
def chromosomeY_adjustment_step1(adata, species="hsapiens", gene_info=["ensembl_gene_id", "external_gene_name", "start_position", "end_position", "chromosome_name"]):
annot = get_biomart_annotations(species, gene_info)
chrY_genes = adata.var_names.intersection(annot.index[annot.chromosome_name == "Y"])
return chrY_genes
def calculate_percent_chrY(adata, chrY_genes):
adata.obs['percent_chrY'] = np.sum(adata[:, chrY_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1 * 100
def add_XIST_expression_to_obs(adata):
adata.obs["XIST-counts"] = adata.X[:, adata.var_names.str.match('XIST')].toarray()
# Example usage
i=0
for adata in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
chrY_genes = chromosomeY_adjustment_step1(adata)
calculate_percent_chrY(adata, chrY_genes)
add_XIST_expression_to_obs(adata)
i=i+1
Explore the dataset after chromosomeY: male and XIST expression: female computing
for adata_list in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
display(adata_list)
AnnData object with n_obs × n_vars = 11727 × 36601
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 13340 × 36601
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 5883 × 36601
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 6816 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 6719 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5007 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5265 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5826 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5423 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5773 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 6690 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 2971 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 4356 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
# Adding some metadata for all BAL samples
adata = balsarc1.concatenate(balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10)
sc.pl.violin(adata, ["XIST-counts", "percent_chrY"], jitter=0.4, groupby = 'sample', rotation= 90)
del(adata)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/anndata/_core/anndata.py:1755: FutureWarning: The AnnData.concatenate method is deprecated in favour of the anndata.concat function. Please use anndata.concat instead. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html ... storing 'type' as categorical ... storing 'sample' as categorical ... storing 'feature_types-0' as categorical ... storing 'feature_types-1' as categorical ... storing 'feature_types-2' as categorical
for adata_list in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
display(adata_list)
AnnData object with n_obs × n_vars = 11727 × 36601
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 13340 × 36601
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 5883 × 36601
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 6816 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 6719 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5007 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5265 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5826 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5423 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 5773 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 6690 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 2971 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
AnnData object with n_obs × n_vars = 4356 × 33538
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts'
var: 'gene_ids'
# Visualize highest expression genes for each dataset in separate panels using a for loop
# Explore the loaded data for each dataset using a for loop
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
def highest_expr_genes(adata_list, n_top=20):
for i, adata in enumerate(adata_list):
sc.pl.highest_expr_genes(adata, n_top=n_top, show=False)
plt.title(f'sample {i+1}')
plt.show()
# Tabular form
df = pd.DataFrame(adata[:, adata.var_names].X.sum(axis=0).A1, index=adata.var_names, columns=['Total Expression'])
df = df.sort_values(by='Total Expression', ascending=False)[:n_top]
print(f"Top {n_top} expressed genes in Dataset {i+1}:")
print(df)
# Example usage:
highest_expr_genes(adata_list, n_top=20)
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 1:
Total Expression
FTL 4062825.0
FTH1 2497482.0
B2M 1097072.0
MALAT1 1056347.0
HLA-DRA 1021517.0
MT-CO1 999195.0
MT-CO3 989242.0
TMSB4X 968801.0
MT-CO2 882213.0
CD74 730477.0
HLA-DRB1 727767.0
MT-ND3 633884.0
LYZ 564110.0
MT-ATP6 546354.0
MT-CYB 501582.0
APOC1 498152.0
TPT1 474481.0
EEF1A1 463938.0
VIM 453250.0
MT-ND4 451690.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 2:
Total Expression
FTL 3141337.0
FTH1 1027058.0
MALAT1 825806.0
MT-CO1 682509.0
B2M 679199.0
MT-CO3 648213.0
TMSB4X 639935.0
HLA-DRA 544669.0
MT-CO2 498588.0
LYZ 482202.0
MT-ATP6 461357.0
APOC1 458132.0
HLA-DRB1 413790.0
CD74 403105.0
MT-ND3 386432.0
MT-CYB 297518.0
TPT1 271147.0
MT-ND4 256590.0
TMSB10 244787.0
EEF1A1 243159.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 3:
Total Expression
FTL 4102206.0
FTH1 1976169.0
MT-CO1 968837.0
MT-CO3 962896.0
MALAT1 954740.0
TMSB4X 940683.0
MT-CO2 894676.0
B2M 877783.0
MT-ATP6 656765.0
MT-ND3 562322.0
HLA-DRA 540661.0
MT-CYB 515631.0
MT-ND4 421075.0
CD74 382996.0
HLA-DRB1 373436.0
MT-ND1 371728.0
MT-ND2 363022.0
VIM 361412.0
S100A6 345059.0
APOC1 343322.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 4:
Total Expression
FTL 10395906.0
FTH1 5451005.0
MT-CO3 2731158.0
MALAT1 2543177.0
MT-CO1 2472719.0
TMSB4X 1905243.0
MT-CO2 1852347.0
MT-ND3 1831130.0
MT-ATP6 1775498.0
B2M 1662683.0
HLA-DRA 1471854.0
MT-CYB 1387605.0
MT-ND4 1181140.0
APOC1 1104697.0
MT-ND1 1033697.0
TPT1 1012083.0
LYZ 939680.0
EEF1A1 905331.0
CD74 863275.0
MT-ND2 841362.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 5:
Total Expression
FTL 10598689.0
FTH1 4131785.0
MT-CO3 2599068.0
MALAT1 2559522.0
MT-CO1 2450207.0
MT-CO2 1987583.0
TMSB4X 1935362.0
MT-ATP6 1651073.0
MT-CYB 1599830.0
MT-ND3 1522507.0
B2M 1515426.0
MT-ND4 1325162.0
APOC1 1206615.0
LYZ 1156868.0
HLA-DRA 1148836.0
MT-ND1 1101885.0
MT-ND2 927675.0
S100A6 861823.0
VIM 823698.0
TPT1 817241.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 6:
Total Expression
FTL 7165733.0
FTH1 3613302.0
MT-CO3 2817990.0
MT-CO1 2572089.0
MALAT1 2088777.0
MT-CO2 1952390.0
TMSB4X 1821704.0
MT-ATP6 1729661.0
MT-CYB 1723181.0
B2M 1577005.0
MT-ND3 1566510.0
HLA-DRA 1445919.0
MT-ND4 1212223.0
MT-ND1 998701.0
MT-ND2 919433.0
CD74 867264.0
VIM 859888.0
APOC1 819989.0
S100A6 776640.0
TPT1 727816.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 7:
Total Expression
FTL 6717957.0
FTH1 3055321.0
MALAT1 2128891.0
TMSB4X 1402581.0
MT-CO1 1220682.0
B2M 1155019.0
HLA-DRA 1130530.0
MT-CO3 1118556.0
LYZ 948417.0
APOC1 917127.0
MT-ND3 891614.0
HLA-DRB1 841454.0
MT-CO2 792905.0
TPT1 784884.0
MT-ATP6 670676.0
MT-ND1 657484.0
EEF1A1 636714.0
CD74 636650.0
MT-CYB 627246.0
MT-ND4 565462.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 8:
Total Expression
FTL 9209223.0
FTH1 3988021.0
MALAT1 1964667.0
LYZ 1860415.0
TMSB4X 1859977.0
MT-CO3 1851935.0
MT-CO2 1773114.0
MT-CO1 1620421.0
MT-CYB 1567909.0
B2M 1374146.0
APOC1 1326064.0
HLA-DRA 1255156.0
MT-ND4 1240598.0
MT-ND2 1161049.0
MT-ATP6 1160996.0
MT-ND3 1144439.0
TPT1 1003851.0
MT-ND1 958549.0
EEF1A1 917186.0
VIM 841938.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 9:
Total Expression
FTL 6821410.0
FTH1 3314335.0
MT-CO3 3017850.0
MT-CO1 2319422.0
MALAT1 2233772.0
MT-CO2 1983552.0
TMSB4X 1719950.0
MT-ATP6 1705501.0
MT-ND3 1681387.0
HLA-DRA 1503728.0
MT-CYB 1491584.0
MT-ND4 1464066.0
B2M 1389004.0
APOC1 1366991.0
HLA-DRB1 1296554.0
MT-ND1 1289340.0
MT-ND2 1080162.0
LYZ 1008273.0
CD74 936236.0
TPT1 848005.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 10:
Total Expression
FTL 4726097.0
MT-CO3 3273444.0
MT-CO1 2929278.0
FTH1 2743566.0
MT-CO2 2286702.0
MALAT1 2164077.0
MT-CYB 1988158.0
MT-ATP6 1862629.0
MT-ND3 1811964.0
MT-ND4 1679469.0
TMSB4X 1597366.0
MT-ND2 1366825.0
MT-ND1 1317631.0
B2M 1095978.0
HLA-DRA 1093788.0
CD74 789387.0
S100A6 681196.0
TPT1 671329.0
LYZ 646042.0
VIM 633655.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 11:
Total Expression
FTL 9409857.0
FTH1 4087084.0
MALAT1 2195627.0
HLA-DRA 2096456.0
MT-CO3 2059806.0
TMSB4X 2054778.0
B2M 1948718.0
MT-CO2 1606590.0
MT-CO1 1576338.0
MT-ND3 1388747.0
MT-CYB 1368434.0
MT-ATP6 1313638.0
APOC1 1264124.0
MT-ND4 1158546.0
CD74 1072526.0
MT-ND2 1041186.0
TPT1 999011.0
MT-ND1 962553.0
HLA-DRB1 879566.0
S100A11 860008.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 12:
Total Expression
FTL 2539220.0
FTH1 1872641.0
TMSB4X 1058601.0
MT-CO1 641349.0
B2M 408778.0
MALAT1 407957.0
MT-CO3 369802.0
MT-CO2 366424.0
CD74 360652.0
MT-ND2 282371.0
APOC1 280711.0
EEF1A1 249987.0
TMSB10 242339.0
S100A6 236386.0
HLA-DRA 226841.0
RPL41 178863.0
MT-CYB 172857.0
VIM 170940.0
HLA-DRB1 170695.0
MT-ND4 170018.0
normalizing counts per cell
finished (0:00:00)
Top 20 expressed genes in Dataset 13:
Total Expression
FTL 4566062.0
FTH1 2977017.0
MT-CO3 2168220.0
MT-CO1 1894855.0
MT-ATP6 1596445.0
TMSB4X 1380306.0
MT-CO2 1373828.0
MALAT1 1254660.0
MT-CYB 1221261.0
MT-ND3 1057914.0
B2M 998849.0
MT-ND4 931870.0
MT-ND1 896048.0
MT-ND2 756042.0
HLA-DRA 754216.0
VIM 569197.0
APOC1 510769.0
S100A6 503668.0
TPT1 477963.0
S100A11 458586.0
print ("filtering out genes in less than 3 cells")
for adata_list in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
sc.pp.filter_cells(adata_list, min_genes=200)
sc.pp.filter_genes(adata_list, min_cells=3)
filtering out genes in less than 3 cells filtered out 36 cells that have less than 200 genes expressed filtered out 16670 genes that are detected in less than 3 cells filtered out 381 cells that have less than 200 genes expressed filtered out 16337 genes that are detected in less than 3 cells filtered out 664 cells that have less than 200 genes expressed filtered out 16117 genes that are detected in less than 3 cells filtered out 291 cells that have less than 200 genes expressed filtered out 14072 genes that are detected in less than 3 cells filtered out 205 cells that have less than 200 genes expressed filtered out 14013 genes that are detected in less than 3 cells filtered out 167 cells that have less than 200 genes expressed filtered out 14315 genes that are detected in less than 3 cells filtered out 108 cells that have less than 200 genes expressed filtered out 15210 genes that are detected in less than 3 cells filtered out 107 cells that have less than 200 genes expressed filtered out 14516 genes that are detected in less than 3 cells filtered out 125 cells that have less than 200 genes expressed filtered out 14425 genes that are detected in less than 3 cells filtered out 172 cells that have less than 200 genes expressed filtered out 14632 genes that are detected in less than 3 cells filtered out 66 cells that have less than 200 genes expressed filtered out 14428 genes that are detected in less than 3 cells filtered out 279 cells that have less than 200 genes expressed filtered out 18574 genes that are detected in less than 3 cells filtered out 237 cells that have less than 200 genes expressed filtered out 16014 genes that are detected in less than 3 cells
# Identifying mitochondrial genes and ribosomal genes and then calculate QC metrics for each dataset
i=1
for adata_list in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
adata_list.var['mt'] = adata_list.var_names.str.startswith('MT-') # mitochondrial genes 'MT-''
adata_list.var['ribo'] = adata_list.var_names.str.startswith(("RPS","RPL")) # ribosomal genes 'RPS/RPL'
sc.pp.calculate_qc_metrics(adata_list, qc_vars=['mt', 'ribo'], percent_top=None, log1p=False, inplace=True)
display ("sample no"+str(i))
sc.pl.violin(adata_list, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
i=i+1
'sample no1'
... storing 'type' as categorical ... storing 'sample' as categorical ... storing 'feature_types' as categorical
'sample no2'
... storing 'type' as categorical ... storing 'sample' as categorical ... storing 'feature_types' as categorical
'sample no3'
... storing 'type' as categorical ... storing 'sample' as categorical ... storing 'feature_types' as categorical
'sample no4'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no5'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no6'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no7'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no8'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no9'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no10'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no11'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no12'
... storing 'type' as categorical ... storing 'sample' as categorical
'sample no13'
... storing 'type' as categorical ... storing 'sample' as categorical
for adata_list in [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]:
display(adata_list)
AnnData object with n_obs × n_vars = 11691 × 19931
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 12959 × 20264
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 5219 × 20484
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 6525 × 19466
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 6514 × 19525
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 4840 × 19223
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 5157 × 18328
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 5719 × 19022
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 5298 × 19113
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 5601 × 18906
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 6624 × 19110
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 2692 × 14964
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
AnnData object with n_obs × n_vars = 4119 × 17524
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
# Filter cells based on QC metrics
balsarc1 = balsarc1[balsarc1.obs.n_genes_by_counts < 4800, :]#The number of genes expressed in the count matrix
balsarc1 = balsarc1[balsarc1.obs.total_counts < 23000, :]#The total counts per cell
balsarc1 = balsarc1[balsarc1.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
balsarc1 = balsarc1[balsarc1.obs.pct_counts_ribo <35, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balsarc2 = balsarc2[balsarc2.obs.n_genes_by_counts < 4000, :] #The number of genes expressed in the count matrix
balsarc2 = balsarc2[balsarc2.obs.total_counts < 17500, :] #The total counts per cell
balsarc2 = balsarc2[balsarc2.obs.pct_counts_mt < 20, :] #The percentage of counts in mitochondrial genes
balsarc2= balsarc2[balsarc2.obs.pct_counts_ribo <35, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balsarc3 = balsarc3[balsarc3.obs.n_genes_by_counts < 6500, :] #The number of genes expressed in the count matrix
balsarc3 = balsarc3[balsarc3.obs.total_counts < 40000, :] #The total counts per cell
balsarc3 = balsarc3[balsarc3.obs.pct_counts_mt < 18, :] #The percentage of counts in mitochondrial genes
balsarc3 = balsarc3[balsarc3.obs.pct_counts_ribo <48, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy1 = balhealthy1[balhealthy1.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
balhealthy1 = balhealthy1[balhealthy1.obs.total_counts < 80000, :] #The total counts per cell
balhealthy1 = balhealthy1[balhealthy1.obs.pct_counts_mt < 20, :] #The percentage of counts in mitochondrial genes
balhealthy1 = balhealthy1[balhealthy1.obs.pct_counts_ribo <35, :] #The percentage of counts in ribosomal gene
# Filter cells based on QC metrics
balhealthy2 = balhealthy2[balhealthy2.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
balhealthy2 = balhealthy2[balhealthy2.obs.total_counts < 70000, :] #The total counts per cell
balhealthy2 = balhealthy2[balhealthy2.obs.pct_counts_mt < 19, :] #The percentage of counts in mitochondrial genes
balhealthy2 = balhealthy2[balhealthy2.obs.pct_counts_ribo <30, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy3 = balhealthy3[balhealthy3.obs.n_genes_by_counts < 8200, :] #The number of genes expressed in the count matrix
balhealthy3 = balhealthy3[balhealthy3.obs.total_counts < 90000, :] #The total counts per cell
balhealthy3 = balhealthy3[balhealthy3.obs.pct_counts_mt < 20, :] #The percentage of counts in mitochondrial genes
balhealthy3 = balhealthy3[balhealthy3.obs.pct_counts_ribo <30, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy4 = balhealthy4[balhealthy4.obs.n_genes_by_counts < 7200, :] #The number of genes expressed in the count matrix
balhealthy4 = balhealthy4[balhealthy4.obs.total_counts < 70000, :] #The total counts per cell
balhealthy4 = balhealthy4[balhealthy4.obs.pct_counts_mt < 17.5, :] #The percentage of counts in mitochondrial genes
balhealthy4 = balhealthy4[balhealthy4.obs.pct_counts_ribo <40, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy5 = balhealthy5[balhealthy5.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
balhealthy5 = balhealthy5[balhealthy5.obs.total_counts < 80000, :] #The total counts per cell
balhealthy5 = balhealthy5[balhealthy5.obs.pct_counts_mt < 17.5, :] #The percentage of counts in mitochondrial genes
balhealthy5 = balhealthy5[balhealthy5.obs.pct_counts_ribo <30, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy6 = balhealthy6[balhealthy6.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
balhealthy6 = balhealthy6[balhealthy6.obs.total_counts < 100000, :] #The total counts per cell
balhealthy6 = balhealthy6[balhealthy6.obs.pct_counts_mt < 20, :] #The percentage of counts in mitochondrial genes
balhealthy6 = balhealthy6[balhealthy6.obs.pct_counts_ribo <45, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy7 = balhealthy7[balhealthy7.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
balhealthy7 = balhealthy7[balhealthy7.obs.total_counts < 70000, :] #The total counts per cell
balhealthy7 = balhealthy7[balhealthy7.obs.pct_counts_mt < 40, :] #The percentage of counts in mitochondrial genes
balhealthy7 = balhealthy7[balhealthy7.obs.pct_counts_ribo <35, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy8 = balhealthy8[balhealthy8.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
balhealthy8 = balhealthy8[balhealthy8.obs.total_counts < 80000, :] #The total counts per cell
balhealthy8 = balhealthy8[balhealthy8.obs.pct_counts_mt < 17.5, :] #The percentage of counts in mitochondrial genes
balhealthy8 = balhealthy8[balhealthy8.obs.pct_counts_ribo <35, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy9 = balhealthy9[balhealthy9.obs.n_genes_by_counts < 4500, :] #The number of genes expressed in the count matrix
balhealthy9 = balhealthy9[balhealthy9.obs.total_counts < 35000, :] #The total counts per cell
balhealthy9 = balhealthy9[balhealthy9.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
balhealthy9 = balhealthy9[balhealthy9.obs.pct_counts_ribo <35, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
balhealthy10 = balhealthy10[balhealthy10.obs.n_genes_by_counts < 7000, :] #The number of genes expressed in the count matrix
balhealthy10 = balhealthy10[balhealthy10.obs.total_counts < 70000, :] #The total counts per cell
balhealthy10 = balhealthy10[balhealthy10.obs.pct_counts_mt < 23, :] #The percentage of counts in mitochondrial genes
balhealthy10 = balhealthy10[balhealthy10.obs.pct_counts_ribo <30, :] #The percentage of counts in ribosomal genes
print("PostQC for First Sample: balsarc1")
sc.pl.violin(balsarc1, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Second Sample: balsarc2")
sc.pl.violin(balsarc2, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Third Sample: balsarc3")
sc.pl.violin(balsarc3, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Fourth Sample: balhealthy1")
sc.pl.violin(balhealthy1, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Fifth Sample: balhealthy2")
sc.pl.violin(balhealthy2, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for sixth Sample: balhealthy3")
sc.pl.violin(balhealthy3, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for seventh Sample: balhealthy4")
sc.pl.violin(balhealthy4, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Eighth Sample: balhealthy5")
sc.pl.violin(balhealthy5, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Nineth Sample: balhealthy6")
sc.pl.violin(balhealthy6, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for tenth Sample: balhealthy7")
sc.pl.violin(balhealthy7, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Eleventh Sample: balhealthy8")
sc.pl.violin(balhealthy8, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Tweleveth Sample: balhealthy9")
sc.pl.violin(balhealthy9, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Thirteenth Sample: balhealthy10")
sc.pl.violin(balhealthy10, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
jitter=0.4, multi_panel=True)
PostQC for First Sample: balsarc1
PostQC for Second Sample: balsarc2
PostQC for Third Sample: balsarc3
PostQC for Fourth Sample: balhealthy1
PostQC for Fifth Sample: balhealthy2
PostQC for sixth Sample: balhealthy3
PostQC for seventh Sample: balhealthy4
PostQC for Eighth Sample: balhealthy5
PostQC for Nineth Sample: balhealthy6
PostQC for tenth Sample: balhealthy7
PostQC for Eleventh Sample: balhealthy8
PostQC for Tweleveth Sample: balhealthy9
PostQC for Thirteenth Sample: balhealthy10
#each cell by total counts over all genes,
# Assuming adata1 to adata7 are your datasets
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Normalization each cell by total counts over all genes (library-size correct) the data matrix to 10,000 reads per cell (target_sum=1e4)
for adata in adata_list:
sc.pp.normalize_total(adata, target_sum=1e4)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
# Assuming adata1 to adata7 are your datasets
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Computes X=log(X+1) , where log denotes the natural logarithm
for adata in adata_list:
sc.pp.log1p(adata)
# Assuming adata1 to adata7 are your datasets
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Apply log1p transformation to each adata
for adata in adata_list:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:00)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:00)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
# Assuming adata1 to adata7 are your datasets
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Total-count normalize (library-size correct) the data matrix to 10,000 reads per cell
for adata in adata_list:
sc.pl.highly_variable_genes(adata)
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Set raw attribute for each adata
for adata in adata_list:
adata.raw = adata
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Set raw attribute for each adata
for adata in adata_list:
adata = adata[:, adata.var.highly_variable]
The regression of total counts per cell, along with the percentage of mitochondrial genes and ribosomal genes, is a commonly employed technique that enhances the quality of scRNA-seq data analysis by mitigating confounding factors related to cell quality and technical variability. (PMID: 29752298)
#Regress out effects of total counts per cell and the percentage of mitochondrial genes expressed.
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Set raw attribute for each adata
for adata in adata_list:
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt','pct_counts_ribo'])
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:09:17)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:09:34)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:33)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:51)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:59)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:03)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:04:49)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:26)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:14)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:05:27)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:06:17)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:03:30)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:04:45)
# Scale each gene to unit variance up to standard deviation 10 to all samples
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
for adata in adata_list:
sc.pp.scale(adata, max_value=10)
import copy
import matplotlib.pyplot as plt
# Create a deep copy of adata_list
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
adata_list_copy = copy.deepcopy(adata_list)
# Initialize an empty list to store the variance ratios
variance_ratios = []
for n_pcs in range(1, 32):
temp_variances = []
for adata_temp in adata_list_copy: # Iterate over each AnnData object
adata_temp = adata_temp.copy() # Create a copy of the AnnData object
sc.tl.pca(adata_temp, n_comps=n_pcs, svd_solver='arpack')
temp_variances.append(adata_temp.uns['pca']['variance_ratio'])
variance_ratios.append(temp_variances)
# Plot the explained variance ratio for each PC
plt.figure(figsize=(10, 6))
for n_pcs in range(1, 32):
for idx, var_ratio in enumerate(variance_ratios[n_pcs - 1], 1):
plt.plot(range(1, n_pcs+1), var_ratio, marker='o', label=f'n_pcs={n_pcs}, dataset={idx}')
plt.xlabel('Number of PCs')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance Ratio for Each PC')
plt.legend(bbox_to_anchor=(1.4,0.8))
plt.show()
# Delete temporary objects
del adata_list_copy
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:01)
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Set raw attribute for each adata
for adata in adata_list:
sc.tl.pca(adata, svd_solver='arpack', n_comps=20)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:01)
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Set raw attribute for each adata
for adata in adata_list:
sc.pl.pca(adata, color= ['CD14', 'CD79A','CD3D', 'FCER1A','NKG7','CST3'])
#scatter plot generation in the PCA coordinates, with 'CD14', 'CD79A','CD3D', 'FCER1A','NKG7' and 'CST3'
print("CD14: CD14+ Monocytes, CD79A: B cell, CD3D : CD4+ T cell, FCER1A: CD16+ Monocyte, NKG7: NK cell, CST3: Dendritic cells")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
CD14: CD14+ Monocytes, CD79A: B cell, CD3D : CD4+ T cell, FCER1A: CD16+ Monocyte, NKG7: NK cell, CST3: Dendritic cells
import os
from scipy import io
save_files = [
'/home/jana/balsarc1.h5ad',
'/home/jana/balsarc2.h5ad',
'/home/jana/balsarc3.h5ad',
'/home/jana/balhealth1.h5ad',
'/home/jana/balhealth2.h5ad',
'/home/jana/balhealthy3.h5ad',
'/home/jana/balhealthy4.h5ad',
'/home/jana/balhealthy5.h5ad',
'/home/jana/balhealthy6.h5ad',
'/home/jana/balhealthy7.h5ad',
'/home/jana/balhealthy8.h5ad',
'/home/jana/balhealthy9.h5ad',
'/home/jana/balhealthy10.h5ad'
]
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Save each adata to the corresponding file
for adata, save_file in zip(adata_list, save_files):
adata.write_h5ad(save_file)
Deleting individual datasets to save space
# Deleting individual datasets to save space
del(balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10)
#Reading last saved annoatated data object written in h5ad data format.
#We used similar adata variable to make similar previous data analysis
# List of file paths
file_paths = [
'/home/jana/balsarc1.h5ad',
'/home/jana/balsarc2.h5ad',
'/home/jana/balsarc3.h5ad',
'/home/jana/balhealth1.h5ad',
'/home/jana/balhealth2.h5ad',
'/home/jana/balhealthy3.h5ad',
'/home/jana/balhealthy4.h5ad',
'/home/jana/balhealthy5.h5ad',
'/home/jana/balhealthy6.h5ad',
'/home/jana/balhealthy7.h5ad',
'/home/jana/balhealthy8.h5ad',
'/home/jana/balhealthy9.h5ad',
'/home/jana/balhealthy10.h5ad'
]
# List to store loaded data objects
data_objects = []
# Loop to read h5ad files and store data objects
for file_path in file_paths:
data_objects.append(sc.read_h5ad(file_path))
# Unpack data objects to individual variables
balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10 = data_objects
Displaying all samples in this workspace
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
for adata in adata_list:
print (adata)
AnnData object with n_obs × n_vars = 11012 × 19931
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 11241 × 20264
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 4547 × 20484
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 5607 × 19466
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 5710 × 19525
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 4336 × 19223
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 4397 × 18328
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 5220 × 19022
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 4731 × 19113
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 5327 × 18906
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 6108 × 19110
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 2438 × 14964
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 3720 × 17524
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
datasets = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Compute UMAP coordinates for each dataset
for dataset in datasets:
sc.pp.neighbors(dataset, n_neighbors=10, n_pcs=20)
sc.tl.umap(dataset)
i=1
# Perform Leiden clustering for each dataset at different resolutions
for dataset in datasets:
sc.tl.leiden(dataset)
sc.tl.leiden(dataset, key_added="leiden_res0_20", resolution=0.20)
sc.tl.leiden(dataset, key_added="leiden_res0_40", resolution=0.40)
sc.tl.leiden(dataset, key_added="leiden_res0_60", resolution=0.60)
sc.tl.leiden(dataset, key_added="leiden_res0_80", resolution=0.80)
sc.tl.leiden(dataset, key_added="leiden_res1", resolution=1.0)
# Plot UMAP visualization with different cluster labels
display ("sample no"+str(i))
sc.pl.umap(dataset, color=["leiden_res0_20", "leiden_res0_40", "leiden_res0_60", "leiden_res0_80", "leiden_res1"], legend_loc="on data")
i=i+1
computing neighbors
using 'X_pca' with n_pcs = 20
/home/jana/my-notebook-venv/lib/python3.8/site-packages/numba/core/typed_passes.py:329: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/rp_tree.py", line 135:
@numba.njit(fastmath=True, nogil=True, parallel=True)
def euclidean_random_projection_split(data, indices, rng_state):
^
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/nndescent.py:91: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/utils.py", line 409:
@numba.njit(parallel=True)
def build_candidates(current_graph, n_vertices, n_neighbors, max_candidates, rng_state):
^
/home/jana/my-notebook-venv/lib/python3.8/site-packages/numba/core/typed_passes.py:329: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/nndescent.py", line 47:
@numba.njit(parallel=True)
def nn_descent(
^
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:14)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:17)
computing neighbors
using 'X_pca' with n_pcs = 20
/home/jana/my-notebook-venv/lib/python3.8/site-packages/numba/core/typed_passes.py:329: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/nndescent.py", line 47:
@numba.njit(parallel=True)
def nn_descent(
^
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:04)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 2 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:13)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:05)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 2 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:14)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:18)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:18)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:12)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:12)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:14)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:13)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:14)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:17)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:07)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 2 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:11)
running Leiden clustering
finished: found 17 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:02)
running Leiden clustering
finished: found 5 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 8 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 10 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:02)
running Leiden clustering
finished: found 15 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:02)
running Leiden clustering
finished: found 17 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:02)
'sample no1'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 20 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 9 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 16 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 20 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:01)
'sample no2'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 21 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 9 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 13 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 16 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 21 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no3'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 19 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 7 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 10 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 16 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no4'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 17 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 7 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 9 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 13 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 17 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no5'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 21 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 6 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 9 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 15 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 21 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no6'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 19 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 5 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 8 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 14 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no7'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 19 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 5 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 7 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 14 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no8'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 19 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 7 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 14 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 17 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no9'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 20 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 6 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 10 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 15 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 20 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no10'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 17 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 5 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 9 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 15 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 17 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no11'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 13 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 4 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 7 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 8 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 13 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no12'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 19 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 6 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 8 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 13 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 16 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 19 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no13'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
Displaying all samples in this workspace
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
for adata in adata_list:
print (adata)
AnnData object with n_obs × n_vars = 11012 × 19931
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 11241 × 20264
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 4547 × 20484
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 5607 × 19466
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 5710 × 19525
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 4336 × 19223
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 4397 × 18328
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 5220 × 19022
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 4731 × 19113
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 5327 × 18906
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 6108 × 19110
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 2438 × 14964
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 3720 × 17524
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
Visualize highest expression genes
# Visualize highest expression genes for each dataset in separate panels using a for loop
# Explore the loaded data for each dataset using a for loop
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
def highest_expr_genes(adata_list, n_top=20):
for i, adata in enumerate(adata_list):
sc.pl.highest_expr_genes(adata, n_top=n_top, show=False)
plt.title(f'sample {i+1}')
plt.show()
# Example usage:
highest_expr_genes(adata_list, n_top=20)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
normalizing counts per cell
finished (0:00:00)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:197: UserWarning: Some cells have zero counts
save_files = [
'/home/jana/balsarc1.h5ad',
'/home/jana/balsarc2.h5ad',
'/home/jana/balsarc3.h5ad',
'/home/jana/balhealth1.h5ad',
'/home/jana/balhealth2.h5ad',
'/home/jana/balhealthy3.h5ad',
'/home/jana/balhealthy4.h5ad',
'/home/jana/balhealthy5.h5ad',
'/home/jana/balhealthy6.h5ad',
'/home/jana/balhealthy7.h5ad',
'/home/jana/balhealthy8.h5ad',
'/home/jana/balhealthy9.h5ad',
'/home/jana/balhealthy10.h5ad'
]
adata_list = [balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10]
# Save each adata to the corresponding file
for adata, save_file in zip(adata_list, save_files):
adata.write_h5ad(save_file)
Deleting individual datasets to save space
# Deleting individual datasets to save space
del(balsarc1, balsarc2, balsarc3, balhealthy1, balhealthy2, balhealthy3, balhealthy4, balhealthy5, balhealthy6, balhealthy7, balhealthy8, balhealthy9, balhealthy10)